package com.jiao.syntax;

import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.util.StrUtil;
import com.jiao.syntax.entity.Token;
import com.jiao.syntax.enums.TokenKind;
import com.jiao.syntax.exception.ParseException;
import lombok.Getter;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static com.jiao.syntax.enums.TokenKind.ALLCOLUNM;
import static com.jiao.syntax.enums.TokenKind.PRECOMPILE;

/**
 * 分词器
 * @Author: vincent.jiao
 * @Date: 2021/4/19
 */
public class Tokenizer {
    public static void main(String[] args) {
//        String expression = " select * from users where user_id like #{user_id} ";
        String expression = " select * from users where user_id like #{user_id} and user_id <= 10 ";
        Map<String, Object> params = new HashMap<>();
        params.put("user_id", "jiao");

        Tokenizer tokens = new Tokenizer(expression);
        System.out.println(tokens.getTokens());
    }

    private String expressionString;

    private char[] charsToProcess;

    private int pos;

    private int max;

    private List<Token> tokens = new ArrayList<Token>();

    private char[] maxIntChar = String.valueOf(Integer.MAX_VALUE).toCharArray();

    public Tokenizer(String inputData) {
        init(inputData);
    }

    private void init(String inputData) {
        this.expressionString = inputData;
        this.charsToProcess = (inputData + "\0").toCharArray();
        this.max = this.charsToProcess.length;
        this.pos = 0;

        process();
    }

    private List<Token> process() {
        while (this.pos < this.max) {
            char ch = this.charsToProcess[this.pos];
            if (isLetter(ch)) {
                lexIdentifier();
            }

            switch (ch){
                case '>':
                    if (isTwoCharToken(TokenKind.GE)) {
                        pushPairToken(TokenKind.GE);
                    }
                    else {
                        pushCharToken(TokenKind.GT);
                    }
                    break;
                case '<':
                    if (isTwoCharToken(TokenKind.LE)) {
                        pushPairToken(TokenKind.LE);
                    }
                    else {
                        pushCharToken(TokenKind.LT);
                    }
                    break;
                case '!':
                    if (isTwoCharToken(TokenKind.NE)) {
                        pushPairToken(TokenKind.NE);
                    } else {
                        pushCharToken(TokenKind.NOT);
                    }
                    break;
                case '=':
                    pushCharToken(TokenKind.EQ);
                    break;
                case '(':
                    pushCharToken(TokenKind.LPAREN);
                    break;
                case '.':
                    pushCharToken(TokenKind.DOT);
                    break;
                case ',':
                    pushCharToken(TokenKind.COMMA);
                    break;
                case ')':
                    pushCharToken(TokenKind.RPAREN);
                    break;
                case '?':
                    pushCharToken(PRECOMPILE);
                    break;
                case '*':
                    pushCharToken(ALLCOLUNM);
                    break;
                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    lexNumeric(ch == '0');
                    break;
                case ' ':
                case '\t':
                case '\r':
                case '\n':
                case '\u0000':
                    // drift over white space
                    this.pos++;
                    break;
                case '\'':
                    lexQuotedStringLiteral();
                    break;
                case '#':
                    pushPlaceholderToken();
                    break;
            }
        }

        return tokens;
    }

    private void lexIdentifier() {
        int start = this.pos;
        while (isIdentifier(this.charsToProcess[this.pos++]));

        pos--;      //回退一步
        TokenKind kind = null;
        char[] subarray = subarray(start, this.pos);


        if(charArrEquals(TokenKind.SELECT.getTokenChars(), subarray)){
            kind = TokenKind.SELECT;
        } else if(charArrEquals(TokenKind.WHERE.getTokenChars(), subarray)){
            kind = TokenKind.WHERE;
        } else if(charArrEquals(TokenKind.FROM.getTokenChars(), subarray)){
            kind = TokenKind.FROM;
        }  else if(charArrEquals(TokenKind.LIKE.getTokenChars(), subarray)){
            kind = TokenKind.LIKE;
        } else {
            kind = TokenKind.IDENTIFIER;
        }

        this.tokens.add(new Token(kind, subarray, start, this.pos ));
    }

    public List<Token> getTokens(){
        return tokens;
    }

    private boolean charArrEquals(char[] a1, char[] a2){
        if(a1.length != a2.length){
            return false;
        }

        for (int i = 0; i < a1.length; i++) {
            if(a1[i] != a2[i]){
                return false;
            }
        }

        return true;
    }

    // REAL_LITERAL :
    // ('.' (DECIMAL_DIGIT)+ (EXPONENT_PART)? (REAL_TYPE_SUFFIX)?) |
    // ((DECIMAL_DIGIT)+ '.' (DECIMAL_DIGIT)+ (EXPONENT_PART)? (REAL_TYPE_SUFFIX)?) |
    // ((DECIMAL_DIGIT)+ (EXPONENT_PART) (REAL_TYPE_SUFFIX)?) |
    // ((DECIMAL_DIGIT)+ (REAL_TYPE_SUFFIX));
    // fragment INTEGER_TYPE_SUFFIX : ( 'L' | 'l' );
    // fragment HEX_DIGIT :
    // '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'|'A'|'B'|'C'|'D'|'E'|'F'|'a'|'b'|'c'|'d'|'e'|'f';
    //
    // fragment EXPONENT_PART : 'e' (SIGN)* (DECIMAL_DIGIT)+ | 'E' (SIGN)*
    // (DECIMAL_DIGIT)+ ;
    // fragment SIGN : '+' | '-' ;
    // fragment REAL_TYPE_SUFFIX : 'F' | 'f' | 'D' | 'd';
    // INTEGER_LITERAL
    // : (DECIMAL_DIGIT)+ (INTEGER_TYPE_SUFFIX)?;
    private void lexNumeric(boolean firstCharIsZero) {
//        if(firstCharIsZero){
//            throw new ParseException("表达式："+expressionString+" 中 int 或 long 不能为 0 开始", this.pos, -1);
//        }

        int start = this.pos;
        while (isNumber(this.charsToProcess[++this.pos]));

        //加强对数字类型的支持
        char c = this.charsToProcess[this.pos] ;
        boolean isLong = false;
        if(c != '\u0000' && c != ' '){
            if((c == 'l' || c == 'L') && this.charsToProcess[this.pos + 1] == ' '){
                isLong = true;
            }
//            else {
//                throw new ParseException("表达式："+expressionString+" 中数字出现语法错误", start, this.pos);
//            }
        }

        char[] cArr = subarray(start, pos);
        isLong = isLong ? true : isLong(cArr);      //自动转换
        pushIntToken(cArr, isLong, start, pos);
    }

    /**
     * 是否为 Long 类型
     * @param cArr
     * @return
     */
    private boolean isLong(char[] cArr){
        int maxIntLeng = maxIntChar.length, cLeng = cArr.length;

        if(cArr.length > maxIntChar.length){
            return true;
        } else if( cArr.length < maxIntChar.length ) {
            return false;
        }

        for (int i = 0; i < maxIntLeng; i++) {
            if(cArr[i] > maxIntChar[i]){
                return true;
            }
        }

        return false;
    }

    private void pushIntToken(char[] data, boolean isLong, int start, int end) {
        if (isLong) {
            this.tokens.add(new Token(TokenKind.LITERAL_LONG, data, start, end));
        }
        else {
            this.tokens.add(new Token(TokenKind.LITERAL_INT, data, start, end));
        }
    }

    private boolean isNumber(char[] cArr){
        if(cArr.length > 0){
            for (char c : cArr) {
                if(!(c >= '0' && c <= '9')){
                    return false;
                }
            }

            return true;
        }

        return false;
    }

    /**
     * 推出字符串
     */
    private void lexQuotedStringLiteral() {
        int start = this.pos;
        boolean terminated = false;
        while (!terminated) {
            this.pos++;
            char ch = this.charsToProcess[this.pos];
            if (ch == '\'') {
                if (this.charsToProcess[this.pos + 1] == '\'') {
                    this.pos++;
                }
                else {
                    terminated = true;
                }
            }
            if (isExhausted()) {
                //结束
                throw new ParseException("表达式："+expressionString+" 字符串中，找不到结尾符", start, this.pos);
            }
        }
        this.pos++;

        start += 1;
        int end = this.pos - 1;
        this.tokens.add(new Token(TokenKind.LITERAL_STRING, subarray(start, end), start, end));
    }

    /**
     * 推入长度为2的令牌.
     */
    private void pushPairToken(TokenKind kind) {
        this.tokens.add(new Token(kind, this.pos, this.pos + 2));
        this.pos += 2;
    }

    /**
     * 推入长度为1的令牌.
     */
    private void pushCharToken(TokenKind kind) {
        this.tokens.add(new Token(kind, this.pos, this.pos + 1));
        this.pos++;
    }

    /**
     * 推入占位符
     */
    private void pushPlaceholderToken() {
        int start = this.pos++;
        if(charsToProcess[this.pos++] != '{') {
            throw new ParseException("token 解析失败, 占位符格式错误, 正确格式为: #{xxx}", start, this.pos);
        }

        while (charsToProcess[this.pos++] != '}' && this.pos < charsToProcess.length){}

        this.tokens.add(new Token(TokenKind.PLACEHOLDER_35, subarray(start, this.pos), start, this.pos));
        this.pos++;
    }

    /**
     * 检查是否2个令牌
     */
    private boolean isTwoCharToken(TokenKind kind) {
        return (kind.getTokenChars().length == 2 &&
                this.charsToProcess[this.pos] == kind.getTokenChars()[0] &&
                this.charsToProcess[this.pos + 1] == kind.getTokenChars()[1]);
    }

    /**
     * 检查是否1个令牌
     */
    private boolean isOneCharToken(TokenKind kind) {
        return (kind.getTokenChars().length == 1 &&
                this.charsToProcess[this.pos] == kind.getTokenChars()[0] &&
                this.charsToProcess[this.pos + 1] == kind.getTokenChars()[1]);
    }

    private char[] subarray(int start, int end) {
        return Arrays.copyOfRange(this.charsToProcess, start, end);
    }

    private boolean isIdentifier(char ch) {
        //('a'..'z'|'A'..'Z'|'_'|'0'..'9')*;
        return isLetter(ch) || isNumber(ch) || ch == '_';
    }

    private boolean isNumber(char c){
        return c >= '0' && c <= '9' ;
    }

    private boolean isLetter(char c){
        return isMinLetter(c) ? true : isMaxLetter(c);
    }

    private boolean isMinLetter(char c){
        return c >= 'a' && c <= 'z' ;
    }

    private boolean isMaxLetter(char c){
        return c >= 'A' && c <= 'Z' ;
    }

    private boolean isExhausted() {
        return (this.pos == this.max - 1);
    }
}
